In this lab, we will use a Convolutional Neral Networks to classify horizontal and vertical Lines
In [ ]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
In [ ]:
torch.manual_seed(4)
plot_channels
: plot out the parameters of the Convolutional layers
In [ ]:
def plot_channels(W):
#number of output channels
n_out=W.shape[0]
#number of input channels
n_in=W.shape[1]
w_min=W.min().item()
w_max=W.max().item()
fig, axes = plt.subplots(n_out,n_in)
fig.subplots_adjust(hspace = 0.1)
out_index=0
in_index=0
#plot outputs as rows inputs as columns
for ax in axes.flat:
if in_index>n_in-1:
out_index=out_index+1
in_index=0
ax.imshow(W[out_index,in_index,:,:], vmin=w_min, vmax=w_max, cmap='seismic')
ax.set_yticklabels([])
ax.set_xticklabels([])
in_index=in_index+1
plt.show()
show_data
: plot out data sample
In [ ]:
def show_data(dataset,sample):
plt.imshow(dataset.x[sample,0,:,:].numpy(),cmap='gray')
plt.title('y='+str(dataset.y[sample].item()))
plt.show()
Create some toy data
In [ ]:
from torch.utils.data import Dataset, DataLoader
class Data(Dataset):
def __init__(self,N_images=100,offset=0,p=0.9, train=False):
"""
p:portability that pixel is wight
N_images:number of images
offset:set a random vertical and horizontal offset images by a sample should be less than 3
"""
if train==True:
np.random.seed(1)
#make images multiple of 3
N_images=2*(N_images//2)
images=np.zeros((N_images,1,11,11))
start1=3
start2=1
self.y=torch.zeros(N_images).type(torch.long)
for n in range(N_images):
if offset>0:
low=int(np.random.randint(low=start1, high=start1+offset, size=1))
high=int(np.random.randint(low=start2, high=start2+offset, size=1))
else:
low=4
high=1
if n<=N_images//2:
self.y[n]=0
images[n,0,high:high+9,low:low+3]= np.random.binomial(1, p, (9,3))
elif n>N_images//2:
self.y[n]=1
images[n,0,low:low+3,high:high+9] = np.random.binomial(1, p, (3,9))
self.x=torch.from_numpy(images).type(torch.FloatTensor)
self.len=self.x.shape[0]
del(images)
np.random.seed(0)
def __getitem__(self,index):
return self.x[index],self.y[index]
def __len__(self):
return self.len
plot_activation
: plot out the activations of the Convolutional layers
In [ ]:
def plot_activations(A,number_rows= 1,name=""):
A=A[0,:,:,:].detach().numpy()
n_activations=A.shape[0]
print(n_activations)
A_min=A.min().item()
A_max=A.max().item()
if n_activations==1:
# Plot the image.
plt.imshow(A[0,:], vmin=A_min, vmax=A_max, cmap='seismic')
else:
fig, axes = plt.subplots(number_rows, n_activations//number_rows)
fig.subplots_adjust(hspace = 0.4)
for i,ax in enumerate(axes.flat):
if i< n_activations:
# Set the label for the sub-plot.
ax.set_xlabel( "activation:{0}".format(i+1))
# Plot the image.
ax.imshow(A[i,:], vmin=A_min, vmax=A_max, cmap='seismic')
ax.set_xticks([])
ax.set_yticks([])
plt.show()
Utility function for computing output of convolutions takes a tuple of (h,w) and returns a tuple of (h,w)
In [ ]:
def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
#by Duane Nielsen
from math import floor
if type(kernel_size) is not tuple:
kernel_size = (kernel_size, kernel_size)
h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
return h, w
Load the training dataset with 10000 samples
In [ ]:
N_images=10000
train_dataset=Data(N_images=N_images)
Load the validating dataset
In [ ]:
validation_dataset=Data(N_images=1000,train=False)
validation_dataset
The data type is long
Each element in the rectangular tensor corresponds to a number representing a pixel intensity as demonstrated by the following image.
Print out the third label
In [ ]:
show_data(train_dataset,0)
In [ ]:
show_data(train_dataset,N_images//2+2)
Plot the third sample
The input image is 11 x11, the following will change the size of the activations:
The following lines of code change the image before we get to the fully connected layer with the following parameters kernel_size
, stride
and pad
.
In [ ]:
out=conv_output_shape((11,11), kernel_size=2, stride=1, pad=0, dilation=1)
print(out)
out1=conv_output_shape(out, kernel_size=2, stride=1, pad=0, dilation=1)
print(out1)
out2=conv_output_shape(out1, kernel_size=2, stride=1, pad=0, dilation=1)
print(out2)
out3=conv_output_shape(out2, kernel_size=2, stride=1, pad=0, dilation=1)
print(out3)
Build a Convolutional Network class with two Convolutional layers and one fully connected layer. Pre-determine the size of the final output matrix. The parameters in the constructor are the number of output channels for the first and second layer.
In [ ]:
class CNN(nn.Module):
def __init__(self,out_1=2,out_2=1):
super(CNN,self).__init__()
#first Convolutional layers
self.cnn1=nn.Conv2d(in_channels=1,out_channels=out_1,kernel_size=2,padding=0)
#activation function
self.relu1=nn.ReLU()
#max pooling
self.maxpool1=nn.MaxPool2d(kernel_size=2 ,stride=1)
#second Convolutional layers
self.cnn2=nn.Conv2d(in_channels=out_1,out_channels=out_2,kernel_size=2,stride=1,padding=0)
#activation function
self.relu2=nn.ReLU()
#max pooling
self.maxpool2=nn.MaxPool2d(kernel_size=2 ,stride=1)
#fully connected layer
self.fc1=nn.Linear(out_2*7*7,2)
def forward(self,x):
#first Convolutional layers
out=self.cnn1(x)
#activation function
out=self.relu1(out)
#max pooling
out=self.maxpool1(out)
#first Convolutional layers
out=self.cnn2(out)
#activation function
out=self.relu2(out)
#max pooling
out=self.maxpool2(out)
#flatten output
out=out.view(out.size(0),-1)
#fully connected layer
out=self.fc1(out)
return out
def activations(self,x):
#outputs activation this is not necessary just for fun
z1=self.cnn1(x)
a1=self.relu1(z1)
out=self.maxpool1(a1)
z2=self.cnn2(out)
a2=self.relu2(z2)
out=self.maxpool2(a2)
out=out.view(out.size(0),-1)
return z1,a1,z2,a2,out
There are 2 output channels for the first layer, and 1 output channel for the second layer
In [ ]:
model=CNN(2,1)
Print the model parameters with the object
In [ ]:
model
Plot the model parameters for the kernels before training the kernels. The kernels are initialized randomly.
In [ ]:
plot_channels(model.state_dict()['cnn1.weight'])
In [ ]:
plot_channels(model.state_dict()['cnn2.weight'])
Define the loss function
In [ ]:
criterion=nn.CrossEntropyLoss()
Define the optimizer class
In [ ]:
learning_rate=0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Define the data loader
In [ ]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=10)
validation_loader=torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=20)
Train the model and determine validation accuracy
In [ ]:
n_epochs=10
loss_list=[]
accuracy_list=[]
N_test=len(validation_dataset)
#n_epochs
for epoch in range(n_epochs):
for x, y in train_loader:
#clear gradient
optimizer.zero_grad()
#make a prediction
z=model(x)
# calculate loss
loss=criterion(z,y)
# calculate gradients of parameters
loss.backward()
# update parameters
optimizer.step()
correct=0
#perform a prediction on the validation data
for x_test, y_test in validation_loader:
z=model(x_test)
_,yhat=torch.max(z.data,1)
correct+=(yhat==y_test).sum().item()
accuracy=correct/N_test
accuracy_list.append(accuracy)
loss_list.append(loss.data)
Plot the loss and accuracy on the validation data:
In [ ]:
fig, ax1 = plt.subplots()
color = 'tab:red'
ax1.plot(loss_list,color=color)
ax1.set_xlabel('epoch',color=color)
ax1.set_ylabel('total loss',color=color)
ax1.tick_params(axis='y', color=color)
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('accuracy', color=color)
ax2.plot( accuracy_list, color=color)
ax2.tick_params(axis='y', labelcolor=color)
fig.tight_layout()
View the results of the parameters for the Convolutional layers
In [ ]:
model.state_dict()['cnn1.weight']
In [ ]:
plot_channels(model.state_dict()['cnn1.weight'])
In [ ]:
model.state_dict()['cnn1.weight']
In [ ]:
plot_channels(model.state_dict()['cnn2.weight'])
Consider the following sample
In [ ]:
show_data(train_dataset,N_images//2+2)
Determine the activations
In [ ]:
out=model.activations(train_dataset[N_images//2+2][0].view(1,1,11,11))
Plot maps out
In [ ]:
plot_activations(out[0],number_rows=1,name="first feature map")
plt.show()
In [ ]:
plot_activations(out[2],number_rows=1,name="first feature map")
plt.show()
In [ ]:
plot_activations(out[3],number_rows=1,name="first feature map")
plt.show()
Save the output of the activation after flattening
In [ ]:
out1=out[4][0].detach().numpy()
Try the same thing for a sample where y=0
In [ ]:
out0=model.activations(train_dataset[100][0].view(1,1,11,11))[4][0].detach().numpy()
out0
In [ ]:
plt.subplot(2, 1, 1)
plt.plot( out1, 'b')
plt.title('Flatted Activation Values ')
plt.ylabel('Activation')
plt.xlabel('index')
plt.subplot(2, 1, 2)
plt.plot(out0, 'r')
plt.xlabel('index')
plt.ylabel('Activation')
Joseph Santarcangelo has a PhD in Electrical Engineering. His research focused on using machine learning, signal processing, and computer vision to determine how videos impact human cognition.
Other contributors: Michelle Carey, Mavis Zhou
Copyright © 2018 cognitiveclass.ai. This notebook and its source code are released under the terms of the MIT License.